
* +++++++++++++++++++++
* CLEAN LABOR MARKET STATS
* +++++++++++++++++++++

/*
source: 
https://statistik.arbeitsagentur.de/SiteGlobals/Forms/Suche/Einzelheftsuche_Formular.html?topic_f=beschaeftigung-eu-heft-eu-heft
*/

global data_raw_lm "${data_raw}/labor_market_stats/"

* loop over files from various years and months
forval year = 2015/2020 {
	foreach month in 03 06 09 12 {
		cap confirm file "${data_raw_lm}/eu-heft-d-0-`year'`month'-xlsx.xlsx"
		if !_rc {
			import excel using "${data_raw_lm}/eu-heft-d-0-`year'`month'-xlsx.xlsx", ///
				clear sheet("Tab5")
			
			* make sure structure of the various files up to the degree necessary
			assert (A[3] == "Staatsangehörigkeit" | A[3] == "K_Staaten") & ///
				B[4] == "Insgesamt"
			
			* give meaningful variable names
			ren A country_of_origin 
			ren B ags
			ren C place_of_work
			ren D num_employed
			ren E frac_of_total_emp
			ren J num_employed_min
			ren K frac_of_total_emp_min
			ren P num_in_train
			ren Q frac_of_total_in_train
			
			* focus on main data
			drop if _n <=4 | ags == "Insgesamt"
			destring ags, replace
			drop F-I L-O R-BHA
			
			* focus on selected countries; could easily add more here!
			keep if country_of_origin == "Arabische Republik Syrien" | ///
				country_of_origin == "Asylherkunftsländer" | ///
				country_of_origin == "Ausländer" | ///
				country_of_origin == "Deutsche" | ///
				country_of_origin == "Insgesamt" | ///
				country_of_origin == "EU insgesamt"

			replace country_of_origin = "syria" ///
				if country_of_origin == "Arabische Republik Syrien"
			replace country_of_origin = "asylum" ///
				if country_of_origin == "Asylherkunftsländer"
			replace country_of_origin = "foreign" ///
				if country_of_origin == "Ausländer"
			replace country_of_origin = "german" ///
				if country_of_origin == "Deutsche"
			replace country_of_origin = "total" ///
				if country_of_origin == "Insgesamt"
			replace country_of_origin = "eu" ///
				if country_of_origin == "EU insgesamt"
			
			* reshape wide on country
			renvars num_*, suff(_)
			drop frac_*
			destring num_*, replace force
			reshape wide num_*, i(ags) j(country_of_origin) string

			gen year = `year' 
			gen month = `month' 
			
			* clean up and save
			order ags place_of_work year month, first
			qui: count
			assert `r(N)' == 401
			
			save "${data_derived}/labor_market_stats_`year'_`month'.dta", replace
		}
	}
}

* combine files
use "${data_derived}/labor_market_stats_2015_03.dta"
forval year = 2015/2020 {
	foreach month in 03 06 09 12 {
	cap confirm file "${data_derived}/labor_market_stats_`year'_`month'.dta"
		if !_rc {
			append using ///
				"${data_derived}/labor_market_stats_`year'_`month'.dta"
		}
	}
}

bys ags year month : keep if _n ==1
sort ags year month

* save
save "${data_derived}/labor_market_stats_2015_to_2020.dta", replace

* clean up, i.e. erase intermediate files
forval year = 2015/2020 {
	foreach month in 03 06 09 12 {
	cap confirm file "${data_derived}/labor_market_stats_`year'_`month'.dta"
		if !_rc {
			erase "${data_derived}/labor_market_stats_`year'_`month'.dta"
		}
	}
}

